This short Markdown file shows how to query the WsprDaemon Timescale Database using R and SQL.

Setup packages

library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
data(World)
dt_options <- list(pageLength=7, scrollX = "400px")

Connect to the database

The settings are available over here.

db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"  
db_user <- "wdread"  
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
                    dbname = db_name,
                    host = db_host,
                    user = db_user,
                    password = db_password)  

Check if it worked:

dbListTables(db_con) 
## [1] "spots"

Yes!

Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):

res <- dbSendQuery(db_con,
  "SELECT * FROM spots
   ORDER BY wd_time DESC
   LIMIT 20;")
dbFetch(res) %>%
  datatable(options = dt_options)

Yes – works.

dbClearResult(res)

Grab a time range

All transmisions

Let’s try one day’s worth of data – reception reports of M0INF. There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign use single quotes.

one_day <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-03T00:00:00Z'
         AND wd_time < '2021-01-04T00:00:00Z';")

This fetches the data to a data frame:

the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
  datatable(options = dt_options)

Now plot:

the_dat %>%
  ggplot(aes(wd_time, distance)) +
  geom_point() +
  geom_smooth(method = "gam",
              formula = y ~ s(x, k = 20)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports of M0INF",
       subtitle = "3 Jan 2021, 20m band") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")

Both transmissions and reception reports

Let’s do the same again; however, now with reception reports both by and of M0INF.

one_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))

The graph is a bit crowded so the y-axis is on the \(\log_{10}\) scale.

the_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Which of those reports are over 5000 km?

the_dat_send_rec %>%
  filter(distance > 5000) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
VK3MO M0INF 16880 7
M0INF KD2OM 5635 2
M0INF PT2FHC 8786 1

And which are under 100 km?

the_dat_send_rec %>%
  filter(distance < 100) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
M0INF GB0SNB/SDR 32 58

Try two days (now there’s more data)

two_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…

two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Again on the 40 m band

day_40m_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-05T00:00:00Z'
   AND wd_time <  '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
day_40m_send_receive_dat %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "5 Jan 2021, 40m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Track signal strength by reporter over time…

First, look at data where there’s more than one report.

dat_sent_many <- the_dat %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many %>%
  kable()
Reporter spots
GB0SNB/SDR 58
IW2NKE 22
YU/G8ZMF 10
SA2KHG 8
OE1WYC 7
IZ7VHF/RX 6
UA3245SWL 6
the_dat %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
  geom_smooth(span = 1, se = FALSE) +
  labs(x = "Time", y = "Signal report (dB)",
       title = "WSPR reports (20m) – signal strength",
       colour = "Reporter") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Try again for latest 40m data:

one_day_40m <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-06T00:00:00Z';")
dat_40m <- dbFetch(one_day_40m)
dbClearResult(one_day_40m)
dat_sent_many_40 <- dat_40m %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  #filter(spots >= 5)
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many_40 %>%
  kable()
Reporter spots
OE9HLH 36
OE9GHV 33
HB9TMC 32
F4GFZ 22
DK8FT/A 21
IW2NKE 21
DK2DB 20
dat_40m %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many_40$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
    geom_smooth(span = 2, se = FALSE, size = 0.8) +
    geom_point(size = 0.6, alpha = 0.5) +
    labs(x = "Time", y = "dB",
         title = "WSPR reports (40m) – signal strength",
         subtitle = "A selection of reporters on 5 Jan 2021",
         colour = "Reporter") +
    scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Last hour sent/received from IO91

Jitter plot

Here are the WSPR frequencies, for labeling the plot in a moment.

wspr_bands <- tibble(
  MHz = c(
    0.136,
    0.4742,
    1.8366,
    3.5686,
    5.2872,
    5364.7,
    7.0386,
    10.1387,
    14.0956,
    18.1046,
    21.0946,
    24.9246,
    28.1246,
    50.293,
    70.091,
    144.489,
    432.300,
    1296.500
  ),
  m = 299.792458 / MHz,
  wavelength_text = ifelse(m >= 1,
                           paste(round(m, 0), "m"),
                           paste(round(m*100, 0), "cm"))
) %>%
  arrange(MHz)

wspr_bands %>% kable()
MHz m wavelength_text
0.1360 2204.3563088 2204 m
0.4742 632.2067862 632 m
1.8366 163.2323086 163 m
3.5686 84.0084229 84 m
5.2872 56.7015543 57 m
7.0386 42.5926261 43 m
10.1387 29.5691221 30 m
14.0956 21.2685134 21 m
18.1046 16.5589109 17 m
21.0946 14.2118105 14 m
24.9246 12.0279747 12 m
28.1246 10.6594390 11 m
50.2930 5.9609182 6 m
70.0910 4.2771891 4 m
144.4890 2.0748462 2 m
432.3000 0.6934824 69 cm
1296.5000 0.2312321 23 cm
5364.7000 0.0558824 6 cm

Grab the data:

last_whispers <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
   AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 3340

Group into WSPR bands:

joined_dat <- the_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.2,
                       distance_col = "dist_band_start") %>%
  mutate(MHz.y = as.factor(MHz.y))

Check how many kHz off the match is (also look for NAs):

summary(joined_dat$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.364   1.475   1.508   2.355   1.550  79.091

Plot:

joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = MHz.y)) +
  geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time))) + 
  theme(legend.position = "none")

Try again, but this time separating the transmissions and receptions in IO91.

joined_dat <- joined_dat %>%
  mutate(direction = ifelse(grepl(
    pattern = "IO91",
    x = Grid,
    ignore.case = TRUE
  ), "Sent from IO91", "Received in IO91"))
joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = direction)) +
  geom_point(position = position_jitterdodge(jitter.width = .1,
                                             jitter.height = 0,
                                             dodge.width = 0.4),
             
             alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time)),
       colour = "Direction")

Polar plot

Let’s do the same again but also showing the direction, for transmissions from IO91 only.

tx_only <- joined_dat %>%
  filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
  ggplot(aes(azimuth, distance, colour = MHz.y)) +
  geom_point(alpha = 0.5) +
  labs(
    title = "WSPR spots IO91",
    subtitle = paste0("From ", min(the_dat$wd_time),
                      " to ", max(the_dat$wd_time)),
    colour = "Freq (MHz)",
    x = NULL,
    y = "Distance"
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

linear_scale_polar

linear_scale_polar +
  scale_y_continuous(trans = "log10")
## Warning: Transformation introduced infinite values in continuous y-axis

tx_only %>%
  filter(distance > 8000) %>%
  select(CallSign, Reporter, distance, azimuth, MHz.x) %>%
  kable()
CallSign Reporter distance azimuth MHz.x
G4LRP PT2FHC 8731 226 14.09706

A bigger one - 48 hours of spots on all frequencies to/from IO91

IO91_48hrs <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-07T00:00:00Z';")
IO91_48hrs_dat <- dbFetch(IO91_48hrs)
dbClearResult(IO91_48hrs)
nrow(IO91_48hrs_dat)
## [1] 189067
IO91_48hrs_dat_bands <- IO91_48hrs_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.1,
                       distance_col = "dist_band_start") %>%
  mutate(
    MHz.y = as.factor(MHz.y),
    direction = ifelse(
      grepl(
        pattern = "IO91",
        x = Grid,
        ignore.case = TRUE
      ),
      "From IO91",
      "To IO91"
    )
  )
IO91_48hrs_dat_bands %>%
  filter(dist_band_start > 5/1000) %>%
  select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
  arrange(desc(dist_band_start))  %>%
  datatable(options = dt_options)
IO91_48hrs_dat_bands %>%
  filter(is.na(MHz.y)) %>%
  select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
  datatable(options = dt_options)
summary(IO91_48hrs_dat_bands$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.362   1.456   1.505   2.619   1.555  79.092       2
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(aes(y=..density..), binwidth = 500, position = "identity", alpha = 0.5) +
  facet_grid(rows = vars(direction)) +
  labs(x = "Distance (km)", y = "Mass", fill = "Freq (MHz)")

date_format <- "%d %b %Y (%H:%m)"
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(
    #aes(y = ..density..),
    binwidth = 200,
    position = "identity",
  ) +
  facet_grid(cols = vars(direction), rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports to/from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  theme(legend.position = "none")

Polar map

IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y) & direction == "From IO91") %>%
  ggplot(aes(azimuth, y = distance)) +
  geom_point() +
  facet_grid(rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

Plot on maps

IO91_48hrs_dat_bands_sf <- IO91_48hrs_dat_bands %>%
  st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
           crs = 4326)

This step is rather slow…

t1 <- Sys.time()
IO91_48hrs_dat_bands_sf %>%
  filter(!is.na(MHz.y)) %>%
  ggplot() +
    geom_sf(data = World) +
    geom_sf(aes(colour = MHz.y)) +
    facet_grid(rows = vars(MHz.y)) +
    theme_classic() +
    labs(
      colour = "Freq (MHz)",
      title = "WSPR reports from IO91 by band",
      subtitle = paste0(format(
        min(IO91_48hrs_dat_bands$wd_time), date_format
      ), " to ",
      format(
        max(IO91_48hrs_dat_bands$wd_time), date_format
      ))
    ) +
    theme(legend.position = "none")

t2 <- Sys.time()
t2 - t1
## Time difference of 1.945946 mins